import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import statsmodels.api as sm
import math
import plotly.express as px #graphing
import plotly.graph_objects as go #graphing
from plotly.subplots import make_subplots #graphing
from datetime import datetime, timedelta
#!pip install missingno
path = "/Users/ycq/Downloads/Principal/"
df =pd.read_csv(path + "/Quant Exercise.csv")
df.head(), df.info(), df.shape
<class 'pandas.core.frame.DataFrame'> RangeIndex: 68 entries, 0 to 67 Columns: 551 entries, DP03_0001E to fips dtypes: float64(300), int64(249), object(2) memory usage: 292.8+ KB
( DP03_0001E DP03_0001M DP03_0001PE DP03_0001PM DP03_0002E DP03_0002M \
0 84387 137 84387.0 NaN 53245.0 910
1 1015608 631 1015608.0 NaN 660484.0 3044
2 54444 101 54444.0 NaN 31615.0 519
3 136682 202 136682.0 NaN 84153.0 1035
4 40064 86 40064.0 NaN 23144.0 447
DP03_0002PE DP03_0002PM DP03_0003E DP03_0003M ... DP03_0136M \
0 63.1 1.1 53200 911 ... NaN
1 65.0 0.3 659555 3038 ... NaN
2 58.1 1.0 31593 518 ... NaN
3 61.6 0.7 84051 1040 ... NaN
4 57.8 1.1 23140 447 ... NaN
DP03_0136PE DP03_0136PM DP03_0137E DP03_0137M DP03_0137PE DP03_0137PM \
0 6.2 0.8 NaN NaN 17.4 1.8
1 7.7 0.4 NaN NaN 21.3 0.7
2 8.4 1.1 NaN NaN 23.8 2.2
3 6.8 0.7 NaN NaN 20.8 1.3
4 8.0 1.2 NaN NaN 25.0 3.2
GEO_ID county fips
0 0500000US42001 Adams 42001.0
1 0500000US42003 Allegheny 42003.0
2 0500000US42005 Armstrong 42005.0
3 0500000US42007 Beaver 42007.0
4 0500000US42009 Bedford 42009.0
[5 rows x 551 columns],
None,
(68, 551))
df.describe(), df.columns, df.isnull().sum()
( total_population DP03_0001M DP03_0001PE labor_force \
count 6.700000e+01 67.000000 6.700000e+01 66.000000
mean 1.560604e+05 230.701493 1.560674e+05 96172.303030
std 2.234164e+05 201.079512 2.234279e+05 144659.556679
min 3.880000e+03 30.000000 3.880000e+03 1493.000000
25% 3.464200e+04 95.500000 3.464200e+04 19214.500000
50% 7.106000e+04 157.000000 7.106000e+04 39691.500000
75% 1.720595e+05 315.000000 1.720595e+05 102158.250000
max 1.272154e+06 1209.000000 1.272154e+06 783168.000000
DP03_0002M DP03_0002PE DP03_0002PM civ_labor_force DP03_0003M \
count 67.000000 67.000000 67.000000 67.000000 67.000000
mean 1008.044776 58.907463 1.094030 97881.895522 1009.134328
std 866.090177 6.430828 0.634102 144187.169841 865.600000
min 119.000000 21.500000 0.300000 1493.000000 119.000000
25% 439.500000 56.050000 0.700000 19409.500000 438.500000
50% 767.000000 59.400000 1.000000 39824.000000 766.000000
75% 1362.000000 62.550000 1.300000 104556.500000 1364.500000
max 5667.000000 68.300000 4.400000 782637.000000 5648.000000
DP03_0003PE ... DP03_0133PM DP03_0134PE DP03_0134PM DP03_0135PE \
count 67.000000 ... 67.000000 67.000000 67.000000 67.000000
mean 58.832836 ... 1.001493 11.685075 1.235821 7.947761
std 6.409756 ... 0.544808 3.598706 0.774343 2.047420
min 21.500000 ... 0.300000 5.300000 0.300000 4.300000
25% 56.050000 ... 0.600000 9.300000 0.800000 6.850000
50% 59.200000 ... 0.900000 11.200000 1.100000 7.900000
75% 62.550000 ... 1.200000 13.100000 1.500000 8.850000
max 68.200000 ... 3.000000 22.900000 4.400000 18.100000
DP03_0135PM DP03_0136PE DP03_0136PM DP03_0137PE DP03_0137PM \
count 67.000000 67.000000 67.000000 67.000000 67.000000
mean 1.465672 8.834328 1.258209 23.802985 2.444776
std 0.738662 2.852939 0.677372 4.962312 1.273302
min 0.400000 3.700000 0.400000 15.300000 0.700000
25% 0.850000 6.800000 0.800000 20.650000 1.500000
50% 1.400000 8.400000 1.200000 23.300000 2.200000
75% 1.700000 10.050000 1.500000 25.850000 2.900000
max 3.800000 20.400000 4.200000 44.300000 7.000000
fips
count 67.000000
mean 42067.000000
std 38.970074
min 42001.000000
25% 42034.000000
50% 42067.000000
75% 42100.000000
max 42133.000000
[8 rows x 458 columns],
Index(['total_population', 'DP03_0001M', 'DP03_0001PE', 'labor_force',
'DP03_0002M', 'DP03_0002PE', 'DP03_0002PM', 'civ_labor_force',
'DP03_0003M', 'DP03_0003PE',
...
'DP03_0134PM', 'DP03_0135PE', 'DP03_0135PM', 'DP03_0136PE',
'DP03_0136PM', 'DP03_0137PE', 'DP03_0137PM', 'GEO_ID', 'county',
'fips'],
dtype='object', length=460),
total_population 0
DP03_0001M 0
DP03_0001PE 0
labor_force 1
DP03_0002M 0
..
DP03_0137PE 0
DP03_0137PM 0
GEO_ID 0
county 0
fips 0
Length: 460, dtype: int64)
df.dtypes
total_population int64
DP03_0001M int64
DP03_0001PE float64
labor_force float64
DP03_0002M int64
...
DP03_0137PE float64
DP03_0137PM float64
GEO_ID object
county object
fips float64
Length: 460, dtype: object
The column header names do not provide a good description of the data. I located the columns that I wanted to use and created a dictionary with the key value pairs for the column header names which can be seen below.
I also added a FIPS county code column to the dataset for each county in Pennsylvania. FIPS is a five-digit Federal Information Processing Standards code which uniquely identifies counties in the United States. We can use FIPS along with geojson to create Choropleth Maps.
# Dictionary of all renamed columns
# All of the renamed columns are ESTIMATES from the U.S. Census Bureau
# Columns not renamed include: Percent (PE), Margin of Error (M), Percent Margin of Error (PM)
dict = {# Employment Status
# Population 16 years and over
"DP03_0001E" : "total_population", # Total Population elgible for work
"DP03_0002E" : "labor_force",
"DP03_0003E" : "civ_labor_force",
"DP03_0004E" : "total_employed",
"DP03_0005E" : "total_unemployed",
"DP03_0006E" : "armed_forces",
"DP03_0007E" : "not_in_labor_force",
# Females 16 years and over
"DP03_0010E" : "total_population_female", # Total Population elgible for work
"DP03_0011E" : "labor_force_female",
"DP03_0012E" : "civ_labor_force_female",
"DP03_0013E" : "civ_labor_force_female_employed",
# Households with children
"DP03_0014E" : "household_children_under_6", # Own children of the householder under 6 years
# All parents in family in labor force
"DP03_0015E" : "parents_work_children_under_6", # Own children of the householder under 6 years
"DP03_0016E" : "household_children_6to17", # Own children of the householder 6 to 17 years
# All parents in family in labor force
"DP03_0017E" : "parents_work_children_6to17", # Own children of the householder 6 to 17 years
# Commuting to work
"DP03_0018E" : "total_workers_commute",
"DP03_0019E" : "solo_vehicle_commute", # Car, truck, or van -- drove alone
"DP03_0020E" : "carpool_commute", # Car, truck, or van -- carpooled
"DP03_0021E" : "public_transportation_commute", # Public transportation (excluding taxicab)
"DP03_0022E" : "walked_commute",
"DP03_0023E" : "other_means_commute",
"DP03_0024E" : "worked_from_home",
"DP03_0025E" : "mean_commute_time_minutes",
# Occupation
"DP03_0027E" : "manage_business_sci_art", # Management, business, science, and arts occupations
"DP03_0028E" : "service_occupations",
"DP03_0029E" : "sales_and_office_occupations",
# Natural resources, construction, and maintenance occupations
"DP03_0030E" : "nr_construction_and_maintenance",
# Production, transportation, and material moving occupations
"DP03_0031E" : "production_transportation_mm",
# Industry
"DP03_0033E" : "ag_forest_fish_hunt_mine", # Agriculture, forestry, fishing and hunting, and mining
"DP03_0034E" : "construction",
"DP03_0035E" : "manufacturing",
"DP03_0036E" : "wholesale_trade",
"DP03_0037E" : "retail_trade",
"DP03_0038E" : "transportation_warehousing_utilities",
"DP03_0039E" : "information",
"DP03_0040E" : "firerl", # Finance, insurance, real estate, rental and leasing
# Professional, scientific, and management, and administrative and waste management services
"DP03_0041E" : "psmawms",
# Educational services, and health care and social assistance
"DP03_0042E" : "education_health_care_social",
# Arts, entertainment, and recreation, and accommodation and food services
"DP03_0043E" : "art_entertainment_accommodation",
"DP03_0044E" : "other_services", # Other services, except public administration
"DP03_0045E" : "public_administration",
# Class of worker
"DP03_0047E" : "private_wage_and_salary_worker",
"DP03_0048E" : "government_worker",
"DP03_0049E" : "self_employed_worker", # Self-employed in own not incorporated business workers
"DP03_0050E" : "unpaid_family_worker",
# Income and benefits (in 2020 inflation-adjusted dollars)
# Total households
"DP03_0051E" : "total_households",
"DP03_0052E" : "household_less_than_10k",
"DP03_0053E" : "household_10k_to_15k", # $10,000 to $14,999
"DP03_0054E" : "household_15k_to_25k", # $15,000 to $24,999
"DP03_0055E" : "household_25k_to_35k", # $25,000 to $34,999
"DP03_0056E" : "household_35k_to_50k", # $35,000 to $49,999
"DP03_0057E" : "household_50k_to_75k", # $50,000 to $74,999
"DP03_0058E" : "household_75k_to_100k", # $75,000 to $99,999
"DP03_0059E" : "household_100k_to_150k", # $100,000 to $149,999
"DP03_0060E" : "household_150k_to_200k", # $150,000 to $199,999
'DP03_0061E' : "household_200k_plus", # $200,000 or more
"DP03_0062E" : "household_median_income", # dollars
"DP03_0063E" : "household_mean_income", # dollars
# Families
"DP03_0075E" : "total_families",
"DP03_0076E" : "family_less_than_10k",
"DP03_0077E" : "family_10k_to_15k", # $10,000 to $14,999
"DP03_0078E" : "family_15k_to_25k", # $15,000 to $24,999
"DP03_0079E" : "family_25k_to_35k", # $25,000 to $34,999
"DP03_0080E" : "family_35k_to_50k", # $35,000 to $49,999
"DP03_0081E" : "family_50k_to_75k", # $50,000 to $74,999
"DP03_0082E" : "family_75k_to_100k", # $75,000 to $99,999
"DP03_0083E" : "family_100k_to_150k", # $100,000 to $149,999
"DP03_0084E" : "family_150k_to_200k", # $150,000 to $199,999
"DP03_0085E" : "family_200k_plus", # $200,000 or more
"DP03_0086E" : "family_median_income", # dollars
"DP03_0087E" : "family_mean_income", # dollars
"DP03_0088E" : "per_capita_income",
# Nonfamily Households
"DP03_0089E" : "total_nonfamily_households",
"DP03_0090E" : "nonfamily_median_income", # dollars
"DP03_0091E" : "nonfamily_mean_income", # dollars
# Median Earnings
"DP03_0092E" : "median_earnings_for_workers", # dollars
"DP03_0093E" : "median_earnings_male_fulltime", # dollars
"DP03_0094E" : "median_earnings_female_fulltime", # dollars
# Health Insurance Coverage
"DP03_0095E" : "total_civ_population", # Total Civilian Noninstitutionalized Population
"DP03_0096E" : "civ_health_insurance_coverage", # Population
"DP03_0097E" : "civ_private_health_insurance", # Population
"DP03_0098E" : "civ_public_health_insurance", # Population
"DP03_0099E" : "civ_no_health_insurance"} # Population
df.rename(columns = dict, inplace = True)
df = df.dropna(axis=1, how="all")
df.head()
| total_population | DP03_0001M | DP03_0001PE | labor_force | DP03_0002M | DP03_0002PE | DP03_0002PM | civ_labor_force | DP03_0003M | DP03_0003PE | ... | DP03_0134PM | DP03_0135PE | DP03_0135PM | DP03_0136PE | DP03_0136PM | DP03_0137PE | DP03_0137PM | GEO_ID | county | fips | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 84387 | 137 | 84387.0 | 53245.0 | 910 | 63.1 | 1.1 | 53200 | 911 | 63.0 | ... | 0.8 | 5.7 | 0.9 | 6.2 | 0.8 | 17.4 | 1.8 | 0500000US42001 | Adams | 42001.0 |
| 1 | 1015608 | 631 | 1015608.0 | 660484.0 | 3044 | 65.0 | 0.3 | 659555 | 3038 | 64.9 | ... | 0.3 | 8.5 | 0.6 | 7.7 | 0.4 | 21.3 | 0.7 | 0500000US42003 | Allegheny | 42003.0 |
| 2 | 54444 | 101 | 54444.0 | 31615.0 | 519 | 58.1 | 1.0 | 31593 | 518 | 58.0 | ... | 1.0 | 9.2 | 1.6 | 8.4 | 1.1 | 23.8 | 2.2 | 0500000US42005 | Armstrong | 42005.0 |
| 3 | 136682 | 202 | 136682.0 | 84153.0 | 1035 | 61.6 | 0.7 | 84051 | 1040 | 61.5 | ... | 0.7 | 7.2 | 0.9 | 6.8 | 0.7 | 20.8 | 1.3 | 0500000US42007 | Beaver | 42007.0 |
| 4 | 40064 | 86 | 40064.0 | 23144.0 | 447 | 57.8 | 1.1 | 23140 | 447 | 57.8 | ... | 1.6 | 9.4 | 1.6 | 8.0 | 1.2 | 25.0 | 3.2 | 0500000US42009 | Bedford | 42009.0 |
5 rows × 460 columns
df.tail(), df.county.nunique
( total_population DP03_0001M DP03_0001PE labor_force DP03_0002M \
63 43821 104 43821.0 22241.0 767
64 294500 356 294500.0 179495.0 1512
65 22487 77 22487.0 13226.0 319
66 360718 453 360718.0 237353.0 1629
67 10456049 2462 10456049.0 6566126.0 11721
DP03_0002PE DP03_0002PM civ_labor_force DP03_0003M DP03_0003PE ... \
63 50.8 1.7 22229 766 50.7 ...
64 60.9 0.5 179459 1511 60.9 ...
65 58.8 1.4 13217 318 58.8 ...
66 65.8 0.5 237053 1634 65.7 ...
67 62.8 0.1 6558087 11636 62.7 ...
DP03_0134PM DP03_0135PE DP03_0135PM DP03_0136PE DP03_0136PM \
63 1.7 6.4 1.5 7.4 1.6
64 0.5 7.2 0.7 6.4 0.5
65 1.4 8.1 1.7 7.8 1.3
66 0.5 6.8 0.8 6.3 0.5
67 0.1 8.2 0.2 9.0 0.2
DP03_0137PE DP03_0137PM GEO_ID county fips
63 24.8 2.8 0500000US42127 Wayne 42127.0
64 21.4 1.2 0500000US42129 Westmoreland 42129.0
65 21.9 2.7 0500000US42131 Wyoming 42131.0
66 20.0 1.4 0500000US42133 York 42133.0
67 23.3 0.3 0400000US42 Pennsylvania NaN
[5 rows x 460 columns],
<bound method IndexOpsMixin.nunique of 0 Adams
1 Allegheny
2 Armstrong
3 Beaver
4 Bedford
...
63 Wayne
64 Westmoreland
65 Wyoming
66 York
67 Pennsylvania
Name: county, Length: 68, dtype: object>)
df0 = df[df["county"] == "Pennsylvania"]
df = df[df["county"] != "Pennsylvania"]
print(df.county.unique())
['Adams' 'Allegheny' 'Armstrong' 'Beaver' 'Bedford' 'Berks' 'Blair' 'Bradford' 'Bucks' 'Butler' 'Cambria' 'Cameron' 'Carbon' 'Centre' 'Chester' 'Clarion' 'Clearfield' 'Clinton' 'Columbia' 'Crawford' 'Cumberland' 'Dauphin' 'Delaware' 'Elk' 'Erie' 'Fayette' 'Forest' 'Franklin' 'Fulton' 'Greene' 'Huntingdon' 'Indiana' 'Jefferson' 'Juniata' 'Lackawanna' 'Lancaster' 'Lawrence' 'Lebanon' 'Lehigh' 'Luzerne' 'Lycoming' 'McKean' 'Mercer' 'Mifflin' 'Monroe' 'Montgomery' 'Montour' 'Northampton' 'Northumberland' 'Perry' 'Philadelphia' 'Pike' 'Potter' 'Schuylkill' 'Snyder' 'Somerset' 'Sullivan' 'Susquehanna' 'Tioga' 'Union' 'Venango' 'Warren' 'Washington' 'Wayne' 'Westmoreland' 'Wyoming' 'York']
plt.rcParams["figure.figsize"] = (12, 8)
# Importing county data for Plotly Choropleth Maps
from urllib.request import urlopen
import json
with urlopen("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json") as response:
counties = json.load(response)
#Pennsylvania Household Income and Benefits
plt.style.use('seaborn-dark')
plot = df0[["county", "household_less_than_10k", "household_10k_to_15k", "household_15k_to_25k",
"household_25k_to_35k", "household_35k_to_50k", "household_50k_to_75k",
"household_75k_to_100k", "household_100k_to_150k", "household_150k_to_200k",
"household_200k_plus"]].plot(x = "county", kind = "bar", cmap = "Spectral")
plt.grid(axis = 'y', alpha = 0.3)
plot.set_xticklabels(plot.get_xticklabels(), rotation = 360, fontsize = 20)
plt.title("Distribution of Pennsylvania Total Household Income", fontsize = 25)
plt.legend(bbox_to_anchor = (1.02, 1), loc = 2, borderaxespad = 0, fontsize = 15)
/var/folders/2r/y399hm9d0hv3ysx7v92kl9l80000gn/T/ipykernel_57748/2960315682.py:3: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn-dark')
<matplotlib.legend.Legend at 0x7fb2c1153a00>
# Percentage of Households with less than $50,000 income
df["household_less_than_50k"] = df["household_less_than_10k"] + df["household_10k_to_15k"] + df["household_15k_to_25k"] + df["household_25k_to_35k"] + df["household_35k_to_50k"]
df["household_less_than_50k_percentage"] = (df["household_less_than_50k"]/df["total_households"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "household_less_than_50k_percentage",
color_continuous_scale = "Reds",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"household_less_than_50k_percentage": "% Household < $50,000 💰"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_white")
fig.show()
# Percentage of Households with more than $100,000 income
df["household_100k_plus"] = df["household_100k_to_150k"] + df["household_150k_to_200k"] + df["household_200k_plus"]
df["household_100k_plus_percentage"] = (df["household_100k_plus"]/df["total_households"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "household_100k_plus_percentage",
color_continuous_scale = "Greens",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -78},
hover_name = "county",
labels = {"household_100k_plus_percentage": "% Household > $100,000 💰"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_white")
fig.show()
# Total Employment Rate
df["employment_rate"] = (df["total_employed"]/df["labor_force"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "employment_rate",
color_continuous_scale = "RdBu_r",
range_color = (91.5, 97.5),
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
hover_data = ["total_employed", "total_unemployed"],
labels = {"employment_rate": "Employment Rate",
"total_employed": "Total Employed",
"total_unemployed": "Total Unemployed"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
# Employment Rate Percentage by County¶
df["labor_force_male"] = df["labor_force"] - df["labor_force_female"]
df["total_male_employed"] = df["total_employed"] - df["civ_labor_force_female_employed"]
df["employment_rate_male"] = (df["total_male_employed"]/df["labor_force_male"]) * 100
df["employment_rate_female"] = (df["civ_labor_force_female_employed"]/df["civ_labor_force_female"]) * 100
plt.style.use("Solarize_Light2")
x1 = df.employment_rate_male
x2 = df.employment_rate_female
x3 = df.employment_rate
y = df.county
plt.figure(figsize = (6, 14), dpi = 80)
plt.scatter(x1, y, color = "#0000FF", edgecolors = "#000000", s = 50, alpha = 0.75, label = "Male Employment Rate %")
plt.scatter(x2, y, color = "#FF00FF", edgecolors = "#000000", s = 50, alpha = 0.75, label = "Female Employment Rate %")
plt.plot(x3, y, color = "#000000", alpha = 0.5, linestyle = "dashed", label = "Total Employment Rate %")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 0.75)
plt.title("Employment Rate % by County")
plt.xlabel("Employment Rate Percentage")
plt.ylabel("")
plt.legend(loc = 2)
plt.show()
#Pennsylvania Median Earnings by County
plt.style.use("Solarize_Light2")
x1 = df.median_earnings_for_workers
x2 = df.median_earnings_male_fulltime
x3 = df.median_earnings_female_fulltime
y = df.county
plt.figure(figsize = (8, 14), dpi = 80)
plt.scatter(x1, y, color = "#000000", alpha = 1, s = 12, label = "All Workers")
plt.plot(x1, y, color = "#000000", alpha = 0.75)
plt.scatter(x2, y, color = "#0000FF", edgecolors = "#000000", label = "Male Full Time")
plt.plot(x2, y, color = "#0000FF", alpha = 0.75, linestyle = "--")
plt.scatter(x3, y, color = "#FF00FF", edgecolors = "#000000", label = "Female Full Time")
plt.plot(x3, y, color = "#FF00FF", alpha = 0.75, linestyle = "--")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 2)
plt.title("Pennsylvania Median Earnings by County")
plt.xlabel("Median Earnings (dollars)")
plt.ylabel("")
plt.legend(loc = 1)
plt.show()
#Pennsylvania Per Capita Income by County
plt.style.use("seaborn-dark")
x = df.per_capita_income
y = df.county
plt.figure(figsize = (8, 14), dpi = 80)
plt.scatter(x, y, color = "#00DB16", alpha = 1, s = 100, edgecolors = "#d3d3d3", label = "Per Capita Income (USD)")
plt.plot(x, y, color = "#00DB16", linestyle = "dotted")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 0.25)
plt.title("Pennsylvania Per Capita Income by County")
plt.xlabel("Per Capita Income (dollars)")
plt.ylabel("")
plt.legend(loc = 1)
plt.show()
/var/folders/2r/y399hm9d0hv3ysx7v92kl9l80000gn/T/ipykernel_57748/3704097254.py:4: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
# Percentage Worked From Home
df["worked_from_home_percentage"] = (df["worked_from_home"]/df["total_workers_commute"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "worked_from_home_percentage",
color_continuous_scale = "Viridis",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"worked_from_home_percentage": "% Working From Home"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
#civ_health_insurance_coverage_percentage
df["civ_health_insurance_coverage_percentage"] = (df["civ_health_insurance_coverage"]/df["total_civ_population"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "civ_health_insurance_coverage_percentage",
color_continuous_scale = "Picnic",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"civ_health_insurance_coverage_percentage": "Percentage w/ Health Insurance 🏥"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
#!pip install geopandas==0.8.1
#!pip install pyshp==1.2.10
#!pip install shapely==1.6.3
#import plotly.figure_factory as ff
#fig = ff.create_choropleth(fips=df.fips,
# scope=['PA'],
# values=df.total_population,
# title='PA total population by County',
# legend_title='')
#fig.layout.template = None
#fig.show()
#import plotly.figure_factory as ff
#values = range(len(df.fips))
#fig = ff.create_choropleth(fips=df.fips, values=values)
#fig.layout.template = None
#fig.show()